library(tidyverse)
library(haven)

## set working directory to 
## dataverse replication folder 

survey <- read_spss("Harris_Data/Harris 1978 Omnibus Economic and Personal Lifestyles Survey, study no. 7883/harris_s7883_spss.por")

# pid
survey$pid <- c(1:nrow(survey))

# study 
survey$study <- as.character(7883)

# study year (year)
survey$year <- 1978

# geographic data (urban)
survey$urban <- NA

# geographic data (region)
survey$region <- NA

# respondent head of household (hh)
survey$hh <- NA

# increasing inequality (inequality)
survey$inequality <- as.character(survey$Q12E_1)
survey$inequality <- dplyr::recode(survey$inequality,
                                   `1` = "Feel",
                                   `2` = "Not Feel",
                                   `3` = "Not Sure")



# inequality variable (inequality.variable)
survey$inequality.variable <- 1

# union (union.self)
survey$union.self <- as.character(survey$F5_1)
survey$union.self <- dplyr::recode(survey$union.self,
                                   `0` = "No",
                                   `1` = "Yes")

survey$union.other <- as.character(survey$F5_2)
survey$union.other <- dplyr::recode(survey$union.other,
                                   `0` = "No",
                                   `1` = "Yes")

survey$union.other[survey$F5_4 == 1] <- "Not sure"
survey$union.self[survey$F5_4 == 1] <- "Not sure"

# employment (employed)
survey$employed <- as.character(survey$F1A)
survey$employed <- dplyr::recode(survey$employed,
                                 `1` = "Hourly wage worker",
                                 `2` = "Salaried",
                                 `3` = "Self-employed",
                                 `4` = "Retired",
                                 `5` = "Unemployed",
                                 `6` = "Student",
                                 `7` = "Military service",
                                 `8` = "Housewife",
                                 `9` = "Disabled",
                                 `10` = "Other (Specify)")

survey$employed.self <- NA

# occupation 
survey$occupation <- as.character(survey$F1B)
survey$occupation <- dplyr::recode(survey$occupation,
                                   `1` = "Professional",
                                   `2` = "Manager, official",
                                   `3` = "Proprietor (small business)",
                                   `4` = "Clerical worker",
                                   `5` = "Sales worker",
                                   `6` = "Skilled craftsman, foreman",
                                   `7` = "Operative, unskilled laborer (except farm)",
                                   `8` = "Service worker",
                                   `9` = "Farmer, farm manager, farm laborer",
                                   `10` = "Other (SPECIFY)")

survey$occupation.self <- NA

# household size (hhsize)
survey$hhsize <- NA

# education (educ)
survey$educ <- as.character(survey$F4)
survey$educ <- dplyr::recode(survey$educ,
                             `1` = "No formal schooling (0 years)",
                             `2` = "First through 7th grade (1-7 years of school completed)",
                             `3` = "8th grade (8 years of school completed)",
                             `4` = "Some high school (9-11 years of school completed)",
                             `5` = "High school graduate (12 years of school completed)",
                             `6` = "Some college (1-3 years of college completed)",
                             `7` = "Two year college graduate (completed 2 years community college, etc.)",
                             `8` = "Four year college graduate (completed 4 years of college)",
                             `9` = "Post graduate (4 year college graduate and completed at least 1 year of graduate school)",
                             `10` = "Refused")
                         

# household income (income)
survey$income <- as.character(survey$F6)
survey$income <- dplyr::recode(survey$income,
                               `1` = "Under $5,000",
                               `2` = "$5,000 to $6,999",
                               `3` = "$7,000 to $9,999",
                               `4` = "$10,000 to $14,999",
                               `5` = "$15,000 to $19,999",
                               `6` = "$20,000 to $24,999",
                               `7` = "$25,000 to $34,999",
                               `8` = "$35,000 and over")

# age
survey$age <- as.character(survey$F3)
survey$age <- dplyr::recode(survey$age,
                            `1` = "18 to 20",
                            `2` = "21 to 24",
                            `3` = "25 to 29",
                            `4` = "30 to 34",
                            `5` = "35 to 39",
                            `6` = "40 to 49",
                            `7` = "50 to 64",
                            `8` = "65 and over")

# race
survey$race <- as.character(survey$F8)
survey$race <- dplyr::recode(survey$race,
                             `1` = "White",
                             `2` = "Black",
                             `3` = "Oriental",
                             `4` = "Spanish-American (Puerto Rican, Mexican-American, etc.)",
                             `5` = "Other (Specify)",
                             `6` = "Not Sure")


# politics (party)
survey$party <- NA

# politics (ideology)
survey$ideology <- NA

# gender
survey$gender <- as.character(survey$F9)
survey$gender <- dplyr::recode(survey$gender,
                               `1` = "Male",
                               `2` = "Female")


# religion
survey$religion <- as.character(survey$F7)
survey$religion <- dplyr::recode(survey$religion,
                                 `1` = "Protestant",
                                 `2` = "Catholic",
                                 `3` = "Jewish",
                                 `4` = "Other(SPECIFY)",
                                 `5` = "None",
                                 `6` = "Not sure")

## factuals
survey$factual1 <- NA
survey$factual2 <- NA
survey$factual3 <- NA

## alienation index
survey$dontcare <- as.character(survey$Q12E_6)
survey$dontcare <- dplyr::recode(survey$dontcare,
                                  `1` = "Feel",
                                  `2` = "Not Feel",
                                  `3` = "Not Sure")

survey$dontcount <- as.character(survey$Q12E_5)
survey$dontcount <- dplyr::recode(survey$dontcount,
                                  `1` = "Feel",
                                  `2` = "Not Feel",
                                  `3` = "Not Sure")

survey$leftout <- as.character(survey$Q12E_9)
survey$leftout <- dplyr::recode(survey$leftout,
                                  `1` = "Feel",
                                  `2` = "Not Feel",
                                  `3` = "Not Sure")


## question place
survey$question_place <- "no party question"

# subset
survey_7883 <- survey[,c("pid", "study", "year", "urban", "region", "hh",
                         "inequality", "inequality.variable", "union.self", "union.other",
                         "employed", "employed.self", "occupation", "occupation.self", "hhsize", "educ", "income", 
                         "age", "race", "party", "ideology", "gender", "religion",
                         "factual1", "factual2", "factual3", "dontcare", "dontcount", "leftout",
                         "question_place")]


# save file
#saveRDS(survey_7883, file = "Harris_Data/survey_7883.rds")
